package com.hdaccp.ch02

import org.apache.spark.sql.SparkSession

/**
  * 分析csv文件
  */
object Demo2 {
  def main(args: Array[String]): Unit = {
    //1.得到SparkSession对象
    val spark = SparkSession.builder()
      .master("local[2]")
      .appName("ch02Demo2App")
      .getOrCreate()
    //2.导入spark的隐式转化
    import spark.implicits._

    val rdd = spark.sparkContext.textFile("f:/resources/users.csv")
    //3.实现rdd到df的转换
    val df = rdd.map(lines=>lines.split("\t")).map(arr=>Users(arr(0).toString.toInt,arr(1),arr(2),arr(3),arr(4))).toDF() ;

    //4.注册一个临时表
    df.createOrReplaceTempView("users")

  //  df.sqlContext.sql("select * from users").show()

    df.sqlContext.sql("select address,count(id) from users group by address").show()

    spark.stop()
  }

  case class Users(id:Int,name:String,address:String,email:String,sex:String)
}
